clear all
set more off
cap log close

** plaatjes=1 if graphs need to be generated, 0 otherwise.
local plaatjes=0

********************************************************************************
***** Project: The Short and Long Term Effects of In-Person Performance Feedback
********************************************************************************
***** A. R. Soetevent & G. J. Romensen
********************************************************************************
***** Treatment Effects Coaching: Sun and Abrahams (2021) CATT analysis
********************************************************************************
***** 
********************************************************************************
***** Latest update: 03-12-2022
********************************************************************************
*global filepath "C:\JPEMicReplication"
*global paperpath "$filepath\TablesGraphs"
local abcd "acceleratie rem bochten fueleconomyLpKM"
log using "$filepath/Logs/X04RegressionsCoachingCATT.log", replace

/*** Notes ***/
*=> No fuel economy observations postfeedback in urban area.
*=> Fuel Economy: kilometers per liter of fuel
*=> ABC dimensions: number of events per 10 kilometers
/*************/

use "$filepath\DEPO\DataMainAnalysisDEPO.dta"

rename bustype bustypes
** Drop all Irisbus observations 
drop if bustypes==3
gen byte ZH = 0 
replace ZH = 1 if regio == "ZH"

** Analysis: based on Treatment region
drop if regio == "ZH"

replace intouro = 0 if intouro ==.

* Drop months with imperfect tracking by coaches 
drop if datum>date("30-4-2016", "DMY")

* Drop eco-coaches [chauf_nr_rug: randomly generated depository numbers!]
local eco_nr "939 1404 519 1286 1610 531"

foreach x of local eco_nr {
drop if chauf_nr_rug==`x'
}

/*** Determine the set of observations used for the analysis ***/
gen byte regobsfuel=1
replace regobsfuel=0 if geplande_ritafstand==. | lnovcheckins==. | punctuality==. | aantal_haltes==. | dep_fueleconomyLpKM==.

gen byte regobsabc=1
replace regobsabc=0  if geplande_ritafstand==. | lnovcheckins==. | punctuality==. | aantal_haltes==. | dep_acceleratie==. | dep_bochten==. | dep_rem==.


****************************************
*** A. Create global list of covariates
****************************************

global covBusType "vdl10 vdl14 iris10 iris10cng iris12 iris12cng intouro"
* (vdl12 = default - more than 50 per cent of observations)

global covEnvironm "ochtendspits avondspits uitleenrit geplande_ritafstand aantal_haltes stadsrit"

global covPassengers "lnovcheckins ovcheckinsmissing"


set matsize 10000

****************************************
* Definition variables for CATT analysis
****************************************
*   Code the cohort categorical variable based on when the individual was first coached, which will be inputted in cohort(varname).
gen coachinghulp = weekindex if postcoaching == 1
bysort chauf_nr_rug: egen TimeFirstCoaching = min(coachinghulp)
drop coachinghulp

label variable TimeFirstCoaching "categorical variable that contains the initial treatment timing (week) of each unit. Set missing for never treated units"

gen byte never_coached = 0 
replace never_coached  = 1 if coachdatum_1 > date("30-4-2016", "DMY")
replace never_coached  = 1 if coachdatum_1 ==. 
label variable never_coached "binary variable that corresponds to the control cohort, = 1 for never-treated units, 0 otherwise" 

* Code the relative time categorical variable.
gen byte rw = weekindex - TimeFirstCoaching 


* Check if there is a sufficient number of treated units for each relative time.  With very few units it might be better to bin the relative times and assume constant treatment effects within the bin.
tab rw

*    We will consider the dynamic effect of coaching on fueleonomy and ABC-outcomes.  We first generate these relative time indicators.
 forvalues k = 10(-1)2 {
       gen c_`k' = rw == -`k'
 }
 
 forvalues k = 0/10 {
       gen c`k' = rw == `k'
 }
	   gen cm10 = (rw < -10)
       gen cp10 = (rw >  10)
 
 
*****************************************************************************
** Estimation effect coaching + plots
*****************************************************************************
gen byte selectie=0

local i = 0
foreach var of local abcd {
local i = `i' + 1
		if `i'<4 {
		replace selectie=regobsabc 
		}
		if `i'==4 {
		replace selectie=regobsfuel 
		}
	if `i'==1 { 
		local ynaam  "Change in number of events per 10km" 
		local yschaal "r(-1.5 .2)" 
		local ylabeltje "-1.5 -1 -0.5 0"
	}
	else if `i'==2 { 
			local ynaam  "Change in number of events per 10km" 
		local yschaal "r(-0.2 0.2)" 
		local ylabeltje "-0.2 -0.1 0 0.1 0.2"
	}
	else if `i'==3 { 
		local ynaam  "Change in number of events per 10km" 
		local yschaal "r(-0.2 0.2)" 
		local ylabeltje "-0.2 -0.1 -0.0 0.1 0.2"
	}
	else if `i'==4 { 
		local ynaam  "Change in fuel economy (liters/100km)" 
		local yschaal "r(-0.8 0.2)" 
		local ylabeltje "-0.8 -0.6 -0.4 -0.2 0 0.2"
	}
	
tab selectie never_coached 
preserve
	duplicates drop chauf_nr_rug, force
	tab  never_coached 
	* 110 drivers never coached.
	tab coachdatum_2
	* 21 drivers received a second session before or at April 30, 2016.
restore

* We use the IW estimator to estimate the dynamic effect on fueleconomyLpKM associated with each relative time.
* With many leads and lags, we need a large matrix size to hold intermediate estimates.
*  Note that Sun and Abraham (2020) only establishes the validity of the IW estimators for balanced panel data without covariates

* a. WITHOUT COVARIATES
eventstudyinteract dep_`var' cm10 c_* c0-c10 cp10 if selectie==1, cohort(TimeFirstCoaching) control_cohort(never_coached) absorb(i.chauf_nr_rug i.weekindex) vce(cluster chauf_nr_rug)
estimates store mSA`i'

*********************************************************************************
*** [Figure 3: Dynamic Treatment Effects of In-Person Coaching [IW estimates]]
*********************************************************************************
*Event study plots
* We may feed the estimates into coefplot for an event study plot.
matrix C = e(b_iw)[1,2..21]
mata st_matrix("A",sqrt(diagonal(st_matrix("e(V_iw)")[2..21, 2..21])))
matrix C = C \ A'
matrix list C

coefplot  matrix(C[1]), se(C[2]) vertical yline(0) xlabel("") xtick(#20) xlabel(1 "-10" 2 "-9" 3 "-8" 4 "-7" 5 "-6" 6 "-5" 7 "-4"  8 "-3"  9 "-2" 10 "0" 11 "1" 12 "2" 13 "3" 14 "4" 15 "5" 16 "6" 17 "7" 18 "8" 19 "9" 20 "10") ///
yscale(`yschaal') ylabel(`ylabeltje') ///
xtitle("week relative to coaching") ytitle("`ynaam'") graphregion(fcolor(white) lcolor(white)) bgcolor(white) ///
name(dtecoach2SA_`var', replace)

graph display dtecoach2SA_`var'
graph save "$paperpath\dtecoach2SA_`var'.png", replace
graph export "$paperpath\dtecoach2SA_`var'.png", replace

* Aggregating event study estimates
*    It is possible to use lincom to estimate the average effect, say over the first 10 weeks after coaching. However, since lincom looks for coefficients and variance covariance matrix stored in e(b) and e(V) a workaround is the following:
 matrix b = e(b_iw)
 matrix V = e(V_iw)
 ereturn post b V
 * Test lags = 0
lincom (c0 + c1 + c2 + c3 + c4 + c5 + c6+ c7+ c8+ c9+ c10)/10
 * Test leads = 0
lincom (c_10 + c_9 + c_8 + c_7 + c_6 + c_5 + c_4+ c_3+ c_2)/9



}

********************************************************************************************************
***  [Table H.11: Dynamic Treatment Effects In-Person Coaching on Driving Performance - IW estimates]
********************************************************************************************************

esttab mSA1 mSA2 mSA3 mSA4 using "$paperpath/TABdteCoachIW.tex", replace f ///
label booktabs b(3) p(3) eqlabels(none) collabels(none) mlabels(none) ///
starlevels($^{*}$ 0.10 $^{**}$ 0.05 $^{***}$ 0.01) ///
cells(b(star  fmt(%9.3f) ) se(par fmt(%9.3f))) ///
stats(N controls driverfe dayfe, fmt(%9.0f) labels("Number of trip-level observations" "Controls" "Driver fixed effects" "Day fixed effects")) ///
varlabels(_cons Constant) ///
prehead("\tabcolsep=0.25cm" "\begin{tabular}{l*{@M}{C{2.0cm}}}\hline\hline" "Dependent variable: &\textbf{Fuel Economy}&\textbf{Acceleration} & \textbf{Braking} & \textbf{Cornering}   \\" "\cline{2-5}") posthead(\hline) ///
prefoot(\hline) postfoot("\hline" "\end{tabular}")

eststo clear


log close


